1   package org.apache.lucene.codecs.lucene40;
2   /*
3    * Licensed to the Apache Software Foundation (ASF) under one or more
4    * contributor license agreements.  See the NOTICE file distributed with
5    * this work for additional information regarding copyright ownership.
6    * The ASF licenses this file to You under the Apache License, Version 2.0
7    * (the "License"); you may not use this file except in compliance with
8    * the License.  You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  import java.io.IOException;
19  import java.util.IdentityHashMap;
20  import java.util.List;
21  import java.util.Random;
22  
23  import org.apache.lucene.analysis.MockAnalyzer;
24  import org.apache.lucene.codecs.Codec;
25  import org.apache.lucene.index.LeafReader;
26  import org.apache.lucene.index.LeafReaderContext;
27  import org.apache.lucene.index.DirectoryReader;
28  import org.apache.lucene.index.IndexWriter;
29  import org.apache.lucene.index.PostingsEnum;
30  import org.apache.lucene.index.RandomIndexWriter;
31  import org.apache.lucene.index.Terms;
32  import org.apache.lucene.index.TermsEnum;
33  import org.apache.lucene.store.Directory;
34  import org.apache.lucene.util.Bits.MatchNoBits;
35  import org.apache.lucene.util.Bits;
36  import org.apache.lucene.util.BytesRef;
37  import org.apache.lucene.util.IOUtils;
38  import org.apache.lucene.util.LineFileDocs;
39  import org.apache.lucene.util.LuceneTestCase;
40  import org.apache.lucene.util.TestUtil;
41  
42  // TODO: really this should be in BaseTestPF or somewhere else? useful test!
43  public class TestReuseDocsEnum extends LuceneTestCase {
44    
45    public void testReuseDocsEnumNoReuse() throws IOException {
46      Directory dir = newDirectory();
47      Codec cp = TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat());
48      RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
49          newIndexWriterConfig(new MockAnalyzer(random())).setCodec(cp));
50      int numdocs = atLeast(20);
51      createRandomIndex(numdocs, writer, random());
52      writer.commit();
53  
54      DirectoryReader open = DirectoryReader.open(dir);
55      for (LeafReaderContext ctx : open.leaves()) {
56        LeafReader indexReader = ctx.reader();
57        Terms terms = indexReader.terms("body");
58        TermsEnum iterator = terms.iterator();
59        IdentityHashMap<PostingsEnum, Boolean> enums = new IdentityHashMap<>();
60        while ((iterator.next()) != null) {
61          PostingsEnum docs = iterator.postings(null, random().nextBoolean() ? PostingsEnum.FREQS : PostingsEnum.NONE);
62          enums.put(docs, true);
63        }
64        
65        assertEquals(terms.size(), enums.size());
66      }
67      writer.commit();
68      IOUtils.close(writer, open, dir);
69    }
70    
71    // tests for reuse only if bits are the same either null or the same instance
72    public void testReuseDocsEnumSameBitsOrNull() throws IOException {
73      Directory dir = newDirectory();
74      Codec cp = TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat());
75      RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
76          newIndexWriterConfig(new MockAnalyzer(random())).setCodec(cp));
77      int numdocs = atLeast(20);
78      createRandomIndex(numdocs, writer, random());
79      writer.commit();
80  
81      DirectoryReader open = DirectoryReader.open(dir);
82      for (LeafReaderContext ctx : open.leaves()) {
83        Terms terms = ctx.reader().terms("body");
84        TermsEnum iterator = terms.iterator();
85        IdentityHashMap<PostingsEnum, Boolean> enums = new IdentityHashMap<>();
86        PostingsEnum docs = null;
87        while ((iterator.next()) != null) {
88          docs = iterator.postings(docs, random().nextBoolean() ? PostingsEnum.FREQS : PostingsEnum.NONE);
89          enums.put(docs, true);
90        }
91        
92        assertEquals(1, enums.size());
93        
94        enums.clear();
95        iterator = terms.iterator();
96        docs = null;
97        while ((iterator.next()) != null) {
98          docs = iterator.postings(docs, random().nextBoolean() ? PostingsEnum.FREQS : PostingsEnum.NONE);
99          enums.put(docs, true);
100       }
101       assertEquals(1, enums.size());  
102     }
103     writer.close();
104     IOUtils.close(open, dir);
105   }
106   
107   // make sure we never reuse from another reader even if it is the same field & codec etc
108   public void testReuseDocsEnumDifferentReader() throws IOException {
109     Directory dir = newDirectory();
110     Codec cp = TestUtil.alwaysPostingsFormat(new Lucene40RWPostingsFormat());
111     MockAnalyzer analyzer = new MockAnalyzer(random());
112     analyzer.setMaxTokenLength(TestUtil.nextInt(random(), 1, IndexWriter.MAX_TERM_LENGTH));
113 
114     RandomIndexWriter writer = new RandomIndexWriter(random(), dir,
115         newIndexWriterConfig(analyzer).setCodec(cp));
116     int numdocs = atLeast(20);
117     createRandomIndex(numdocs, writer, random());
118     writer.commit();
119 
120     DirectoryReader firstReader = DirectoryReader.open(dir);
121     DirectoryReader secondReader = DirectoryReader.open(dir);
122     List<LeafReaderContext> leaves = firstReader.leaves();
123     List<LeafReaderContext> leaves2 = secondReader.leaves();
124     
125     for (LeafReaderContext ctx : leaves) {
126       Terms terms = ctx.reader().terms("body");
127       TermsEnum iterator = terms.iterator();
128       IdentityHashMap<PostingsEnum, Boolean> enums = new IdentityHashMap<>();
129       MatchNoBits bits = new Bits.MatchNoBits(firstReader.maxDoc());
130       iterator = terms.iterator();
131       PostingsEnum docs = null;
132       BytesRef term = null;
133       while ((term = iterator.next()) != null) {
134         docs = iterator.postings(randomDocsEnum("body", term, leaves2, bits), random().nextBoolean() ? PostingsEnum.FREQS : PostingsEnum.NONE);
135         enums.put(docs, true);
136       }
137       assertEquals(terms.size(), enums.size());
138       
139       iterator = terms.iterator();
140       enums.clear();
141       docs = null;
142       while ((term = iterator.next()) != null) {
143         docs = iterator.postings(randomDocsEnum("body", term, leaves2, bits), random().nextBoolean() ? PostingsEnum.FREQS : PostingsEnum.NONE);
144         enums.put(docs, true);
145       }
146       assertEquals(terms.size(), enums.size());
147     }
148     writer.close();
149     IOUtils.close(firstReader, secondReader, dir);
150   }
151   
152   public PostingsEnum randomDocsEnum(String field, BytesRef term, List<LeafReaderContext> readers, Bits bits) throws IOException {
153     if (random().nextInt(10) == 0) {
154       return null;
155     }
156     LeafReader indexReader = readers.get(random().nextInt(readers.size())).reader();
157     Terms terms = indexReader.terms(field);
158     if (terms == null) {
159       return null;
160     }
161     TermsEnum iterator = terms.iterator();
162     if (iterator.seekExact(term)) {
163       return iterator.postings(null, random().nextBoolean() ? PostingsEnum.FREQS : PostingsEnum.NONE);
164     }
165     return null;
166   }
167 
168   /**
169    * populates a writer with random stuff. this must be fully reproducable with
170    * the seed!
171    */
172   public static void createRandomIndex(int numdocs, RandomIndexWriter writer,
173       Random random) throws IOException {
174     LineFileDocs lineFileDocs = new LineFileDocs(random);
175 
176     for (int i = 0; i < numdocs; i++) {
177       writer.addDocument(lineFileDocs.nextDoc());
178     }
179     
180     lineFileDocs.close();
181   }
182 
183 }